---
title: "Identifying Price Informativeness"
author: "Eduardo Davila^[Yale] & Cecilia Parlatore^[NYU Stern]"
date: "`r Sys.Date()`"
output:
  html_document: default
  pdf_document:  default
---

```{r echo=FALSE, include=FALSE}

library(here); library(tidyverse); library(cowplot); library(kableExtra); library(moments); library(texreg); library(DescTools); library(stargazer)
path <- here::here(); print(path); setwd(path); rm(path)

load("intermediate/data_selected.RData")
load("intermediate/io_share.RData")
load("intermediate/parameters.RData")
load("intermediate/results_PS.RData")
load("intermediate/results_rolling_clean.RData")

source("functions/sc_ivol.R")

options(scipen = 999)

if (!file.exists("output/output_tables/")){ 
 dir.create("output/output_tables/") 
}

```

## PS

```{r}

fn_rawreg <- function(reg, namefile) {
  coeffs <- names(reg$coefficients)
  for(i in 1:length(coeffs)){
    coeffs[i] <- str_replace_all(coeffs[i], "X", " $\\\\times$ ")
    coeffs[i] <- str_replace_all(coeffs[i], "_", "\\\\_")
  }
  t <- summary(reg)$coefficients[,3]
  stargazer(reg, 
         out=paste0("output/output_tables/", namefile, ".tex"),
         covariate.labels = coeffs,
         single.row=TRUE,
         header=FALSE,
         keep.stat=c("n", "rsq"),
         t=t)
}

fn_rawreg(reg_long_PS, "table_IA1")
fn_rawreg(reg_short_PS, "table_IA2")
fn_rawreg(reg_long_eps, "table_IA3")
fn_rawreg(reg_short_eps, "table_IA4")


```

## Yearly moments

```{r}
  
fn_yearly_moments <- function(input_df, namefile){
  
  results <- input_df %>%
    filter(tau_pi_R > 0) %>%
    filter(year_end >= 1985) %>%
    group_by(permno, year_end) %>%
    summarize(tau_pi_R = mean(tau_pi_R, na.rm = T), .groups = "drop") %>% #take mean if multiple obs per permno per year
    group_by(year_end) %>%
    summarize(
      across(tau_pi_R, list(
          Median = ~ median(.,        na.rm = T),
          Mean   = ~ mean(.,          na.rm = T),
          SD     = ~ sd(.,            na.rm = T),
          Skew   = ~ skewness(.,      na.rm = T),
          Kurt   = ~ kurtosis(.,      na.rm = T) - 3, # minus three yields excess kurtosis
          P5     = ~ quantile(., 0.05, na.rm = T, names = F),
          P25    = ~ quantile(., 0.25, na.rm = T, names = F),
          P75    = ~ quantile(., 0.75, na.rm = T, names = F),
          P95    = ~ quantile(., 0.95, na.rm = T, names = F),
          n      = ~ n())),
      .groups = "drop")
  
  colnames(results) <- gsub("tau_pi_R_", "", colnames(results))


results <- results %>% filter(year_end >= 1985 & year_end < 2017)
 
if(namefile == "table_1"){
  results <- results %>% filter(year_end %% 5 == 0)
} 
if(namefile == "table_IA5"){
  results <- results %>% filter(year_end >= 1994)
} 
if(namefile == "table_IA7"){
  results <- results %>% filter(year_end %% 5 == 0)
} 
results <- results %>% rename("$t$"  = year_end,
                          "$Median$" = Median,
                          "$Mean$"   = Mean,
                          "$SD$"     = SD,
                          "$Skew$"   = Skew,
                          "$Kurt$"   = Kurt,
                          "$P5$"     = P5,
                          "$P25$"    = P25,
                          "$P75$"    = P75,
                          "$P95$"    = P95,
                          "$n$"      = n)

  kab      <- kable(results, "latex", booktabs = T,  digits = 4, linesep = "", escape = FALSE)
  path     <- paste0("output/output_tables/", namefile, ".tex")
  fileConn <- file(path); writeLines(kab, fileConn); close(fileConn)
}

fn_yearly_moments(results_q_PS, "table_1")
fn_yearly_moments(results_q_PS_unlearnable, "table_IA5")
fn_yearly_moments(results_q_rolling_clean, "table_IA7")
```

# Regressions

```{r}

fn_regression <- function(input_df, namefile) {

  if(namefile == "table_IA8" | namefile == "table_IA10"){
    df_merged <- input_df %>%
    left_join(df_q %>% select(permno, year, month, mcap, booktomarket, turnover, analyst_count),
                           by = c("permno" = "permno", "year_end" = "year", "month_end" = "month")) %>%
    left_join(ivol,        by = c("permno" = "permno", "year_end" = "year", "month_end" = "month")) %>%
    left_join(df_io_share, by = c("permno" = "permno", "year_end" = "year", "month_end" = "month")) %>%
    filter(year_end >= 1985 & year_end < 2017) %>%
    mutate(book = ifelse(booktomarket < 0, NA, booktomarket),
           size = log(mcap),
           analyst_count = ifelse(is.na(analyst_count), 0, analyst_count)) %>%
    drop_na()
  }else{
    df_merged <- input_df %>%
    filter(year_end >= 1985 & year_end < 2017) %>%
    filter(tau_pi_R > 0) %>%
    mutate(book = ifelse(booktomarket < 0, NA, booktomarket),
           size = log(mcap),
           analyst_count = ifelse(is.na(analyst_count), 0, analyst_count)) %>%
    drop_na()
  }
      
  df <- df_merged %>%
    group_by(year_end) %>%
    mutate(across(c(size, turnover, book, ivol, io_share, analyst_count), list(ntile = ~ntile(., 20)), .names = "{col}_{fn}"))
  
  df_size     <- df %>% group_by(year_end, size_ntile)     %>% summarize(characteristic = mean(size),     tau_pi_R = mean(tau_pi_R), var = "size", .groups = "keep") %>% rename("ntile" = size_ntile) %>% drop_na()
  
  reg_size     <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_size)
  df_size$resid <- reg_size$residuals
  
  df <- df %>% 
    left_join(df_size %>% select(year_end, ntile, resid),
              by = c("year_end" = "year_end", "size_ntile" = "ntile"))
  df_merged$resid <- df$resid
  
  df_size     <- df %>% group_by(year_end, size_ntile)     %>% summarize(characteristic = mean(size),     tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "size", .groups = "keep") %>% rename("ntile" = size_ntile)
  df_turnover <- df %>% group_by(year_end, turnover_ntile) %>% summarize(characteristic = mean(turnover), tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "size", .groups = "keep") %>% rename("ntile" = turnover_ntile)
  df_book     <- df %>% group_by(year_end, book_ntile)     %>% summarize(characteristic = mean(book),     tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "size", .groups = "keep") %>% rename("ntile" = book_ntile)
  df_ivol     <- df %>% group_by(year_end, ivol_ntile)     %>% summarize(characteristic = mean(ivol),     tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "size", .groups = "keep") %>% rename("ntile" = ivol_ntile)
  df_io       <- df %>% group_by(year_end, io_share_ntile) %>% summarize(characteristic = mean(io_share), tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "size", .groups = "keep") %>% rename("ntile" = io_share_ntile)
  df_coverage <- df %>% group_by(year_end, analyst_count_ntile) %>% summarize(characteristic = mean(analyst_count), tau_pi_R = mean(tau_pi_R), resid = mean(resid), var = "analyst_count", .groups = "keep") %>% rename("ntile" = analyst_count_ntile)
  
  
  reg_size     <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_size)
  reg_book     <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_book)
  reg_turnover <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_turnover)
  reg_ivol     <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_ivol)
  reg_io       <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_io)
  reg_coverage <- lm(tau_pi_R ~ characteristic + factor(year_end), data = df_coverage)
  reg_io_resid <- lm(resid ~ characteristic + factor(year_end), data = df_io)
  reg_coverage_resid <- lm(resid ~ characteristic + factor(year_end), data = df_coverage)
  
  size     <- summary(reg_size)$coefficients[2,]     %>% data.frame() %>% slice(1:n() - 1)
  book     <- summary(reg_book)$coefficients[2,]     %>% data.frame() %>% slice(1:n() - 1)
  turnover <- summary(reg_turnover)$coefficients[2,] %>% data.frame() %>% slice(1:n() - 1)
  ivol     <- summary(reg_ivol)$coefficients[2,]     %>% data.frame() %>% slice(1:n() - 1)
  io       <- summary(reg_io)$coefficients[2,]       %>% data.frame() %>% slice(1:n() - 1)
  coverage <- summary(reg_coverage)$coefficients[2,] %>% data.frame() %>% slice(1:n() - 1)
  io_resid <- summary(reg_io_resid)$coefficients[2,]       %>% data.frame() %>% slice(1:n() - 1)
  coverage_resid <- summary(reg_coverage_resid)$coefficients[2,] %>% data.frame() %>% slice(1:n() - 1)
  
  if(namefile == "table_IA10"){
     regressions <- cbind(size, book, turnover, ivol, io, coverage) %>% 
    t() %>% 
    data.frame() %>% 
    mutate(across(starts_with("t"), format, digits = 2, nsmall = 2)) %>% 
    mutate(across(starts_with("S"), format, digits = 2, nsmall = 4)) %>% 
    mutate(across(starts_with("E"), format, digits = 2, nsmall = 4))
  
    rownames(regressions) <- c("Size", "Value", "Turnover", "Idiosyncratic Volatility", "Institutional Ownership", "Analysts Covering")
  }else{
     regressions <- cbind(size, book, turnover, ivol, io, coverage, io_resid, coverage_resid) %>% 
    t() %>% 
    data.frame() %>% 
    mutate(across(starts_with("t"), format, digits = 2, nsmall = 2)) %>% 
    mutate(across(starts_with("S"), format, digits = 2, nsmall = 4)) %>% 
    mutate(across(starts_with("E"), format, digits = 2, nsmall = 4))
  
    rownames(regressions) <- c("Size", "Value", "Turnover", "Idiosyncratic Volatility", "Institutional Ownership", "Analysts Covering", "Institutional Ownership (Residualized)", "Analysts (Residualized)")
  }
 
  colnames(regressions) <- c("Estimate", "Std. Error", "t-stat")
  
  kab      <- kable(regressions, "latex", booktabs = T)
  path     <- paste0("output/output_tables/", namefile, ".tex")
  fileConn <- file(path); writeLines(kab, fileConn); close(fileConn)
}

fn_regression(results_q_PS, "table_2")
fn_regression(results_q_rolling_clean, "table_IA8")
fn_regression(results_q_rolling_nocon_clean, "table_IA10")

```

# Term Structure
```{r}

fn_term <- function(input_df, namefile) {
  df_summarized <- input_df %>%
    filter(tau_pi_R > 0) %>%
    filter(year_end >= 1985 & year_end < 2017) %>%
    group_by(tenor) %>%
    summarize(m = mean(tau_pi_R, na.rm=TRUE), mean_sd = sd(tau_pi_R, na.rm=TRUE), n = n(), median=quantile(tau_pi_R, 0.5, na.rm=TRUE), p5 = quantile(tau_pi_R, 0.05, na.rm=TRUE), p95 = quantile(tau_pi_R, 0.95, na.rm=TRUE))
  
  df_table <- df_summarized %>%
    mutate(Tenor=as.factor(tenor)) %>%
    mutate(across(where(is.numeric), format, digits = 3, nsmall = 2)) %>%
    rename("Mean"     = m,
           "Std. Dev"  = mean_sd,
           "P5"      = p5,
           "Median"   = median,
           "P95"      = p95,
           "N" = n) %>%
    select(Tenor, "Mean", "Std. Dev", "P5", "P95", "N")
  
  kab      <- kable(df_table, "latex", booktabs = T)
  path     <- paste0("output/output_tables/", namefile, ".tex")
  fileConn <- file(path); writeLines(kab, fileConn); close(fileConn)
}

fn_term(results_q_PS_tenor, "table_3")
fn_term(results_q_rolling_tenor_clean, "table_IA9")

```

# Persistence

```{r}
results <- results_q_PS %>% 
  filter(tau_pi_R > 0) %>%
  group_by(permno, year_end) %>% 
  summarize(tau_pi_R = mean(tau_pi_R, na.rm = T), .groups = "drop") %>% #take mean if multiple obs per permno per year
  group_by(permno) %>% 
  mutate(count = n(),
         year_dif = year_end - lag(year_end),
         max_dif = max(year_dif, na.rm = TRUE)) %>%
  filter(count > 1) %>% # each permno must have data for more than 1 year
  filter(max_dif == 1) %>% # we drop those permnos for which there are jumps
  select(permno, year_end, tau_pi_R) %>%
  arrange(permno) %>%
  mutate(
    lag_1 = lag(tau_pi_R, 1),
    lag_2 = lag(tau_pi_R, 2),
    lag_3 = lag(tau_pi_R, 3),
    lag_4 = lag(tau_pi_R, 4),
    lag_5 = lag(tau_pi_R, 5)) %>%
  group_by(year_end) %>%
  summarize(across(c(tau_pi_R), list(
      rho_t_1  = ~ cor(., lag_1, use = "pairwise.complete.obs"),
      rho_t_2  = ~ cor(., lag_2, use = "pairwise.complete.obs"),
      rho_t_3  = ~ cor(., lag_3, use = "pairwise.complete.obs"),
      rho_t_4  = ~ cor(., lag_4, use = "pairwise.complete.obs"),
      rho_t_5  = ~ cor(., lag_5, use = "pairwise.complete.obs"))),
      .groups = "drop")

colnames(results) <- gsub("tau_pi_R_", "", colnames(results))

results <- results %>% filter(year_end >= 1986 & year_end < 2017) #tiny samples oth.

results <- results %>%
mutate(across(starts_with("rho"), format, digits = 4, nsmall = 2)) %>% 
  rename("$t$"                        = year_end,
  "$\\rho_{t,t-1}(\\tau_{\\pi}^{R})$" = rho_t_1,
  "$\\rho_{t,t-2}(\\tau_{\\pi}^{R})$" = rho_t_2,
  "$\\rho_{t,t-3}(\\tau_{\\pi}^{R})$" = rho_t_3,
  "$\\rho_{t,t-4}(\\tau_{\\pi}^{R})$" = rho_t_4,
  "$\\rho_{t,t-5}(\\tau_{\\pi}^{R})$" = rho_t_5)

kab      <- kable(results, "latex", booktabs = T, digits = 4, linesep = "", escape = FALSE)
path     <- "output/output_tables/table_IA6.tex"
fileConn <- file(path); writeLines(kab, fileConn); close(fileConn)

tx  <- readLines(path)
tx2  <- gsub(pattern = "NA", replacement = "", x = tx)
writeLines(tx2, path)
  
```

# Correlation
```{r}

results <- results_q_PS %>% 
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_Base=tau_pi_R)

unlearnable_df <- results_q_PS_unlearnable %>%
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_Unlearnable=tau_pi_R)

nocon_df <- results_q_PS_nocon %>%
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_NoCon=tau_pi_R)

results <- results %>% 
  left_join(unlearnable_df, by=c("permno", "year_end", "month_end")) %>%
  left_join(nocon_df, by=c("permno", "year_end", "month_end")) %>%
  rename_with(~ sub("^tau_pi_R_", "", .x), starts_with("tau_pi_R_"))

pooled_results <- results %>%
  filter(year_end >= 1996, year_end<2017) %>%
  select(-c(permno, year_end, month_end)) %>%
  cor(use = "pairwise.complete.obs")




#Rolling
results_rolling <- results_q_rolling_clean %>% 
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_Base=tau_pi_R)

unlearnable_df_rolling <- results_q_rolling_unlearnable_clean %>%
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_Unlearnable=tau_pi_R)

nocon_df_rolling <- results_q_rolling_nocon_clean %>%
  select(permno, year_end, month_end, tau_pi_R) %>%
  filter(tau_pi_R > 0) %>%
  rename(tau_pi_R_NoCon=tau_pi_R)

results_rolling <- results_rolling %>% 
  left_join(unlearnable_df_rolling, by=c("permno", "year_end", "month_end")) %>%
  left_join(nocon_df_rolling, by=c("permno", "year_end", "month_end")) %>%
  rename_with(~ sub("^tau_pi_R_", "", .x), starts_with("tau_pi_R_"))

pooled_results_rolling <- results_rolling %>%
  filter(year_end >= 1996, year_end<2017) %>%
  select(-c(permno, year_end, month_end)) %>%
  cor(use = "pairwise.complete.obs")



pos_share <- results_q_PS %>%
  mutate(dummy_pos = ifelse(tau_pi_R > 0, 1, 0)) %>%
  group_by(year_end) %>%
  summarise(n_pos = sum(dummy_pos, na.rm = T),
            n_obs = n()) %>%
  ungroup() %>%
  mutate(pos_share = (n_pos/n_obs)*100) 

# pos_share %>%
#   summarise(pos_share = mean(pos_share))

```